cleanup before you start

rm( list=ls() )
source('../lib/utilities.R')
library(moments)
save.plots = TRUE 

setup

D <- read.table('../data/gdp-us-grate.csv')

dates <- as.Date(as.character(D[,1]),'%Y-%m-%d')

H <- 6
N <- nrow(D)-6
y <- D[1:N,2]
y.out <-D[(N+1):(N+H),2]
# LBQ Test
Box.test( y, lag=22 , type="Ljung-Box" )
## 
##  Box-Ljung test
## 
## data:  y
## X-squared = 84.824, df = 22, p-value = 2.563e-09
# ACF & PACF
par( mar=c(2,2,1,1) , mfrow=c(2,1) )
acf( y , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('ACF'),col=c('darkorange2'),lwd=3)
pacf( y , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('PACF'),col=c('darkorange2'),lwd=3)

Estimate ARMA Models

ar1    = arima(y,order=c(1,0,0))
ma1    = arima(y,order=c(0,0,1))
ma2    = arima(y,order=c(0,0,2))
arma11 = arima(y,order=c(1,0,1))

# GoF
ar1_aic    <- (-2*ar1$loglik+2*3)/N
ma1_aic    <- (-2*ma1$loglik+2*3)/N
ma2_aic    <- (-2*ma2$loglik+2*4)/N
arma11_aic <- (-2*arma11$loglik+2*4)/N

ar1_bic    <- (-2*ar1$loglik+log(N)*3)/N
ma1_bic    <- (-2*ma1$loglik+log(N)*3)/N
ma2_bic    <- (-2*ma2$loglik+log(N)*4)/N
arma11_bic <- (-2*arma11$loglik+log(N)*4)/N

round( rbind( c(ar1$loglik,ma1$loglik.ma2$loglik,arma11$loglik), 
              c(ar1_aic,ma1_aic,ma2_aic,arma11_aic) , 
              c(ar1_bic,ma1_bic,ma2_bic,arma11_bic) ) ,  3 )
##          [,1]     [,2]     [,3]     [,4]
## [1,] -767.611 -766.537 -767.611 -766.537
## [2,]    5.351    5.389    5.341    5.351
## [3,]    5.390    5.427    5.391    5.402

FITTED VALUES

ar1_mu     <- y-ar1$residuals
ma1_mu     <- y-ma1$residuals
ma2_mu     <- y-ma2$residuals
arma11_mu  <- y-arma11$residuals
ar1_res    <- as.numeric(ar1$residuals)
ma1_res    <- as.numeric(ma1$residuals)
ma2_res    <- as.numeric(ma2$residuals)
arma11_res <- as.numeric(arma11$residuals)

Plot a bunch of stuff..

# AR1
par( mar=c(2,2,1,1) , xaxs="i" , mfrow=c(2,1) )
myplot( dates[1:N] , ar1_mu , t='l' , lwd=2 , col='blue3' , ylim=c(0,10) )
grid( lwd=1 , col="darkgrey" )
myplot( dates[1:N] , ar1_res/sd(ar1_res) , col='purple' , t='p'  , ylim=c(-4,4) )
abline( h=0 , lwd=2 )
grid( lwd=1 , col="darkgrey" )

par( mar=c(2,2,1,1) , mfrow=c(2,1) )
acf( ar1_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('ACF'),col=c('darkorange2'),lwd=3)
pacf( ar1_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('PACF'),col=c('darkorange2'),lwd=3)

# MA1
par( mar=c(2,2,1,1) , xaxs="i" , mfrow=c(2,1) )
myplot( dates[1:N] , ma1_mu , col='blue3' , ylim=c(0,10) )
grid( lwd=1 , col="darkgrey" )
myplot( dates[1:N] , ma1_res/sd(ma1_res) , col='purple' , t='p' , ylim=c(-4,4) )
abline( h=0 , lwd=2 )
grid( lwd=1 , col="darkgrey" )

par( mar=c(2,2,1,1) , mfrow=c(2,1) )
acf( ma1_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('ACF'),col=c('darkorange2'),lwd=3)
pacf( ma1_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('PACF'),col=c('darkorange2'),lwd=3)

# MA2
par( mar=c(2,2,1,1) , xaxs="i" , mfrow=c(2,1) )
myplot( dates[1:N] , ma2_mu , t='l' , lwd=2 , col='blue3', ylim=c(0,10) )
grid( lwd=1 , col="darkgrey" )
myplot( dates[1:N] , ma2_res/sd(ma2_res) , col='purple' , t='p' , ylim=c(-4,4)  )
abline( h=0 , lwd=2 )
grid( lwd=1 , col="darkgrey" )

par( mar=c(2,2,1,1) , mfrow=c(2,1) )
acf( ma2_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('ACF'),col=c('darkorange2'),lwd=3)
pacf( ma2_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('PACF'),col=c('darkorange2'),lwd=3)

# ARMA11
par( mar=c(2,2,1,1) , xaxs="i" , mfrow=c(2,1) )
myplot( dates[1:N] , arma11_mu , lwd=2 , col='blue3' , ylim=c(0,10))
grid( lwd=1 , col="darkgrey" )
myplot( dates[1:N] , arma11_res/sd(arma11_res) , col='purple' , t='p' , ylim=c(-4,4) )
abline( h=0 , lwd=2 )
grid( lwd=1 , col="darkgrey" )

par( mar=c(2,2,1,1) , mfrow=c(2,1) )
acf( arma11_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('ACF'),col=c('darkorange2'),lwd=3)
pacf( arma11_res , ylim=c(-0.2,1) , lwd=5 , xlim=c(0,25) , col='darkorange2' , tck=0.02)
legend('topright',c('PACF'),col=c('darkorange2'),lwd=3)

Residuals

par( mar=c(2,2,3,2) , mfrow=c(2,2) )
kernel <- density(ar1_res/sqrt(ar1$sigma2))
plot( kernel , main='AR1' )
polygon( kernel , col="tomato" , border='darkred')
abline(h=0,lwd=2)
lines( seq(-10,20,0.1) , dnorm( seq(-10,20,0.1) ) , col='darkblue' ,lwd=2 )

kernel <- density(ma1_res/sqrt(ma1$sigma2))
plot( kernel , main='MA1' )
polygon( kernel , col="tomato" , border='darkred')
abline(h=0,lwd=2)
lines( seq(-10,20,0.1) , dnorm( seq(-10,20,0.1) ) , col='darkblue' ,lwd=2 )

kernel <- density(ma2_res/sqrt(ma2$sigma2))
plot( kernel , main='MA2' )
polygon( kernel , col="tomato" , border='darkred')
abline(h=0,lwd=2)
lines( seq(-10,20,0.1) , dnorm( seq(-10,20,0.1) ) , col='darkblue' ,lwd=2 )

kernel <- density(arma11_res/sqrt(arma11$sigma2))
plot( kernel , main='ARMA11' )
polygon( kernel , col="tomato" , border='darkred')
abline(h=0,lwd=2)
lines( seq(-10,20,0.1) , dnorm( seq(-10,20,0.1) ) , col='darkblue' ,lwd=2 )

par( mar=c(2,2,3,2) , mfrow=c(2,2) )

qqnorm(ar1_res,col='tomato',main='AR1')
qqline(ar1_res,lwd=2,lty=3)
qqnorm(ma1_res,col='tomato',main='MA1')
qqline(ma1_res,lwd=2,lty=3)
qqnorm(ma2_res,col='tomato',main='MA2')
qqline(ma2_res,lwd=2,lty=3)
qqnorm(arma11_res,col='tomato',main='ARMA11')
qqline(arma11_res,lwd=2,lty=3)

FORECAST EXERCISE 1

ar1_pred    <- predict( ar1 , n.ahead=H )
ma1_pred    <- predict( ma1 , n.ahead=H )
ma2_pred    <- predict( ma2 , n.ahead=H )
arma11_pred <- predict( arma11 , n.ahead=H )

ar1_mse    = sqrt( mean( (y.out - as.numeric(ar1_pred$pred) )**2 ) )
ma1_mse    = sqrt( mean( (y.out - as.numeric(ma1_pred$pred) )**2 ) )
ma2_mse    = sqrt( mean( (y.out - as.numeric(ma2_pred$pred) )**2 ) )
arma11_mse = sqrt( mean( (y.out - as.numeric(arma11_pred$pred) )**2 ) )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b', main=sprintf('AR(1) RMSE %3.3f',ar1_mse) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=ar1$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , ar1_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , as.numeric(ar1_pred$pred)  , t='b' , lwd=2 , col='blue3' )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b' , main=sprintf('MA(1) RMSE %3.3f',ma1_mse) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=ma1$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , ma1_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , as.numeric(ma1_pred$pred)  , t='b' , lwd=2 , col='blue3' )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b', main=sprintf('MA(2) RMSE %3.3f',ma2_mse) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=ma2$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , ma2_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , as.numeric(ma2_pred$pred)  , t='b' , lwd=2 , col='blue3' )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b' , main=sprintf('ARMA(1,1) RMSE %3.3f',arma11_mse) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=arma11$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , arma11_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , as.numeric(arma11_pred$pred)  , t='b' , lwd=2 , col='blue3' )

round(c( ar1_mse , ma1_mse , ma2_mse ,  arma11_mse )*100 , 3 )
## [1] 1997.261 1997.538 1997.163 1996.688

FORECAST EXERCISE 2

ar1_pred    <- rep(0,H)
ma1_pred    <- rep(0,H)
ma2_pred    <- rep(0,H)
arma11_pred <- rep(0,H)

for( m in 0:(H-1) ){
  
  y <- D[1:(N+m),2]

  ar1    = arima(y,order=c(1,0,0))
  ma1    = arima(y,order=c(0,0,1))
  ma2    = arima(y,order=c(0,0,2))
  arma11 = arima(y,order=c(1,0,1))

  ar1_pred[1+m]    <- predict( ar1 , n.ahead=1 )$pred
  ma1_pred[1+m]    <- predict( ma1 , n.ahead=1 )$pred
  ma2_pred[1+m]    <- predict( ma2 , n.ahead=1 )$pred
  arma11_pred[1+m] <- predict( arma11 , n.ahead=1 )$pred
}

ar1_r2    = 1-mean( (y.out - ar1_pred )**2 )/mean( (mean(y)-y.out)**2 )
ma1_r2    = 1-mean( (y.out - ma1_pred )**2 )/mean( (mean(y)-y.out)**2 )
ma2_r2    = 1-mean( (y.out - ma2_pred )**2 )/mean( (mean(y)-y.out)**2 )
arma11_r2 = 1-mean( (y.out - arma11_pred )**2 )/mean( (mean(y)-y.out)**2 )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b', main=sprintf('AR(1) R2 %3.3f',ar1_r2) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=ar1$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , ar1_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , ar1_pred  , t='b' , lwd=2 , col='blue3' )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b' , main=sprintf('MA(1) R2 %3.3f',ma1_r2) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=ma1$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , ma1_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , ma1_pred  , t='b' , lwd=2 , col='blue3' )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b', main=sprintf('MA(2) R2 %3.3f',ma2_r2) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=ma2$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , ma2_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , ma2_pred  , t='b' , lwd=2 , col='blue3' )

myplot( dates[(N-10):(N+H)] , c(y[(N-10):N], y.out) , t='b' , main=sprintf('ARMA(1,1) R2 %3.3f',arma11_r2) , ylim=c(0,7) , col='darkorange' ) 
abline( v=dates[N] , lwd=2 )
abline( h=arma11$coef['intercept'] , lwd=2 )
lines( dates[(N-10):N] , arma11_mu[(N-10):N] , t='l' , lwd=2 , col='blue3' )
lines( dates[(N+1):(N+H)] , arma11_pred  , t='b' , lwd=2 , col='blue3' )

round(c( ar1_r2 , ma1_r2 , ma2_r2 ,  arma11_r2 )*100 , 3 )
## [1] -42.544 -25.992 -39.463 -41.733